%%%%%%Complex Contagion and Sructural Diversity%%%%%%%

%this script generate ego-level tables associated with the complex contagion and structural diversity as discribed in S2.7 and S3.5.2 sections in Suppelementary Materials.
%In more details for each ego we extract the number of peers that are
%active in eac day but also the number of active connected components (i.e. the connected components in the egpo' s neighborhood that at
%least one individual is active).


%Output file: "structural_diversity_data.txt"



clear all
clc

%load data
%individual characteristics data
App_Users_Demographic=dataset('File','App_Users_in_Graph_demographics.csv'); %the users we consider in our analysis, i.e. we have running activity data for them
%running activity data
load distance_mat %for example distance_mat(3,1) is the distance individual with user number=1 runs on day 3
load duration_mat
load run_mat
load pace_mat
load calories_mat
load TimeZone_mat
load StartTime_mat
%weather data
load PRECIPITATION_mat
load TMAX_mat
%running-buddy network
load USERREL_USEDFOR_SOCIAL_INFLUENCE_wth_correlations


daynumbers_consideration=datenum(first_day_of_observations):datenum(last_day_of_observations);daynumbers_consideration=daynumbers_consideration';
l=find(daynumbers_consideration==min(USERREL_USEDFOR_SOCIAL_INFLUENCE.created_datenumber)); %find when the first link was created and work the model thereafter
daynumbers_consideration=daynumbers_consideration(l:end);
PRECIPITATION_mat=PRECIPITATION_mat(l:end,:);
TMAX_mat=TMAX_mat(l:end,:);
run_mat=run_mat(l:end,:);
distance_mat=distance_mat(l:end,:);
duration_mat=duration_mat(l:end,:);
pace_mat=pace_mat(l:end,:);
calories_mat=calories_mat(l:end,:);
TimeZone_mat=TimeZone_mat(l:end,:);
StartTime_mat=StartTime_mat(l:end,:);




%%%%%%%%%%%%%%% time-invariant covariates of individuals %%%%%%%%%
%age
age_vec=year(App_Users_Demographic.JOIN_DATE)-App_Users_Demographic.BIRTH_DATE_YEAR;
%gender
gender_vec=zeros(size(age_vec));
gender_vec(find(strcmp(App_Users_Demographic.GENDER,'M') | strcmp(App_Users_Demographic.GENDER,'m')))=1; %males
gender_vec(find(strcmp(App_Users_Demographic.GENDER,'F') | strcmp(App_Users_Demographic.GENDER,'f')))=2; %females
gender_vec(find(strcmp(App_Users_Demographic.GENDER,'U') | strcmp(App_Users_Demographic.GENDER,'u') | strcmp(App_Users_Demographic.GENDER,'')))=3; %unidentified
%height
height_vec=App_Users_Demographic.HEIGHT;
%weight
weight_vec=App_Users_Demographic.WEIGHT;
%device
device_vec=App_Users_Demographic.DEVICE_ID;
%country
country_vec=zeros(size(age_vec));
   usa={'US', 'USA', 'USA ', 'United States', 'United States of America','United+States', 'United+States+of+America', 'Usa', 'Us', 'uS', 'uSA','united States', 'united States of America', 'united states', 'united states of america', 'united+states', 'us', 'usa'};
   uk={'England', 'GB', 'Great Britain', 'United Kingdom', 'Uk', 'uk','United+Kingdom', 'england', 'gB', 'gb', 'uK', 'united kingdom','united+kingdom'};
   jp= {'JAPAN', 'JP', 'jp', 'jpn','japan'};
   ca={ 'CA', 'CAN', 'CANADA', 'CAnada', 'Canada', 'Canada ', 'Canada+','ca', 'canada'};
   de={'DE', 'GE', 'GER', 'Germany', 'de', 'germany','ge','ger'};
   es={'ES', 'ESP', 'Es', 'Esp', 'Espa%C3%B1a', 'Espa&ntilde;a','catalonia', 'es', 'espa%C3%B1a', 'espa&ntilde;a', 'espa?a','SPAIN','spain'};
   br={ 'BR', 'BRASIL', 'BRA', 'BRAZIL', 'Brasil', 'Brazil', 'bR', 'br', 'brasil'};
   au={ 'AU', 'AUSTRALIA', 'AUT', 'Australia', 'Australia ', 'au', 'australia'};
   fr= {'FR', 'FRANCE', 'France', 'fr', 'france'};
   mx={ 'MX', 'MEXICO', 'MEX', 'Mexico', 'mexico', 'mx'};
   nl={ 'NL', 'Netherlands', 'Nederland', 'nederland', 'netherlands','nl', 'the netherlands'};
   tw={ 'TW','TAIWAN', 'Taiwan', 'tw','taiwan'};
for i=1:length(usa)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,usa(i))))=1;
end
for i=1:length(uk)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,uk(i))))=2;
end
for i=1:length(jp)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,jp(i))))=3;
end
for i=1:length(ca)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,ca(i))))=4;
end
for i=1:length(de)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,de(i))))=5;
end
for i=1:length(es)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,es(i))))=6;
end
for i=1:length(br)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,br(i))))=7;
end
for i=1:length(au)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,au(i))))=8;
end
for i=1:length(fr)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,fr(i))))=9;
end
for i=1:length(mx)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,mx(i))))=10;
end
for i=1:length(nl)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,nl(i))))=11;
end
for i=1:length(tw)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,tw(i))))=12;
end




%consider all the links where the weather of the two ends is not correlated
corr_threshold=0.025; %weather correlation threshold
links_for_consideration=USERREL_USEDFOR_SOCIAL_INFLUENCE_wth_correlations(find(USERREL_USEDFOR_SOCIAL_INFLUENCE_wth_correlations.weather_corr_same_day<=corr_threshold),7:9);
%links_for_consideration are in the form : created_datenumber    FROM_USER_NUM    TO_USER_NUM

%build the network Adjacency Matrix to find the connected components and then do a community partition on the largest component.
 All_Unique_users_in_links=unique([links_for_consideration.FROM_USER_NUM;links_for_consideration.TO_USER_NUM]);
 %%%%Build Ajacency matrix
 Adj_mat=zeros(length(All_Unique_users_in_links),length(All_Unique_users_in_links));
 for i=1:length(links_for_consideration)
     Adj_mat(find(All_Unique_users_in_links==links_for_consideration.FROM_USER_NUM(i)),find(All_Unique_users_in_links==links_for_consideration.TO_USER_NUM(i)))=1;
 end
 [k,l]=graphconncomp(sparse(Adj_mat)); %find the connected components of the network
 size_of_components=zeros(k,2);
 for i=1:k
   size_of_components(i,:)=[i sum(l==i)];
 end 
 un_egos_num=unique(links_for_consideration.FROM_USER_NUM);
 [C,ia,ib]=intersect(un_egos_num,All_Unique_users_in_links);
 ego_component_ids=repmat(l(ib),length(daynumbers_consideration),1);

 %Nodes in the largest component 
 Nodes_largest_comp= All_Unique_users_in_links(find(l==size_of_components(find(size_of_components(:,2)==max(size_of_components(:,2))),1)));
 largest_cmp_adj_mat=sparse(zeros(length(Nodes_largest_comp),length(Nodes_largest_comp))); %the adjacency matrix of the largest component
 for i=1:length(links_for_consideration)
     largest_cmp_adj_mat(find(Nodes_largest_comp==links_for_consideration.FROM_USER_NUM(i)),find(Nodes_largest_comp==links_for_consideration.TO_USER_NUM(i)))=1;
     if mod(i,1000)==0
        i
     end
 end
[com,Q] = community_detection_algorithm(full(largest_cmp_adj_mat)); %community detection on the largest component using the detection algorithm. Citation: "Newman, Mark EJ. "Fast algorithm for detecting community structure in networks." Physical review E 69 066133 (2004).".
l(find(l==find(size_of_components(:,2)==max(size_of_components(:,2)))))=com+max(l);
size_of_components=zeros(max(l),2);
 for i=1:max(l)
     
   size_of_components(i,:)=[i sum(l==i)];
 end
 
 [C,ia,ib]=intersect(un_egos_num,All_Unique_users_in_links);
 ego_communities_ids=repmat(l(ib),length(Days_num),1);



%%%%find the unique egos
un_egos_num=unique(links_noprcp_no_tmax_corr.FROM_USER_NUM);
length(un_egos_num)
ego_level_same_day_noprectmaxcorr=[];
ego_level_same_day_noprectmaxcorr.ego_num=reshape(repmat(un_egos_num',length(daynumbers_consideration),1),length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.ego_community_id=reshape(ego_communities_ids,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.day_num=reshape(repmat(daynumbers_consideration,1,length(un_egos_num)),length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.month_num=reshape(repmat(month(daynumbers_consideration),1,length(un_egos_num)),length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.year_num=reshape(repmat(year(daynumbers_consideration),1,length(un_egos_num)),length(daynumbers_consideration)*length(un_egos_num),1);


ego_distance_t_mat=distance_mat(:,un_egos_num);


%find the date that the first link formed for each person
for i=1:length(un_egos_num)
      
    linkscreatednumbers=links_noprcp_no_tmax_corr.created_datenumber(links_noprcp_no_tmax_corr.FROM_USER_NUM==un_egos_num(i));
    ego_distance_t_mat(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
  
    
end


ego_distance_t_1_mat=[NaN(1,length(ego_distance_t_mat(1,:)));ego_distance_t_mat(1:end-1,:)];
ego_distance_t_2_mat=[NaN(2,length(ego_distance_t_mat(1,:)));ego_distance_t_mat(1:end-2,:)];
ego_distance_t_3_mat=[NaN(3,length(ego_distance_t_mat(1,:)));ego_distance_t_mat(1:end-3,:)];

ego_level_same_day_noprectmaxcorr.ego_distance_t=reshape(ego_distance_t_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.ego_distance_t_1=reshape(ego_distance_t_1_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.ego_distance_t_2=reshape(ego_distance_t_2_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.ego_distance_t_3=reshape(ego_distance_t_3_mat,length(daynumbers_consideration)*length(un_egos_num),1);

%%%%find the number of active friends and number of active components each ego has%%%%%%
ego_no_of_connected_comp_mat=zeros(length(daynumbers_consideration),length(un_egos_num));
no_of_components_who_run_t_mat=zeros(length(daynumbers_consideration),length(un_egos_num));
no_of_active_friends_t_mat=zeros(length(daynumbers_consideration),length(un_egos_num));
ego_connectivity_t_mat=zeros(length(daynumbers_consideration),length(un_egos_num));
average_friends_connectivity_mat=zeros(length(daynumbers_consideration),length(un_egos_num));

run_mat_wnan=run_mat;
distance_mat_wnan=distance_mat;


%time invariant characteristics of friends
av_fr_age_vec=zeros(1,length(un_egos_num));
av_fr_height_vec=zeros(1,length(un_egos_num));
av_fr_weight_vec=zeros(1,length(un_egos_num));
portion_of_friends_that_male_vec=zeros(1,length(un_egos_num));

portion_of_friends_US_vec=zeros(1,length(un_egos_num));
portion_of_friends_UK_vec=zeros(1,length(un_egos_num));
portion_of_friends_JP_vec=zeros(1,length(un_egos_num));
portion_of_friends_CA_vec=zeros(1,length(un_egos_num));
portion_of_friends_wristdevice_vec=zeros(1,length(un_egos_num));
portion_of_friends_application1_vec=zeros(1,length(un_egos_num));
portion_of_friends_application2_vec=zeros(1,length(un_egos_num));






for i=1:length(un_egos_num)
    
    
    
    friends_num=links_noprcp_no_tmax_corr.TO_USER_NUM(find(links_noprcp_no_tmax_corr.FROM_USER_NUM==un_egos_num(i)));
    
    %design the time invariant characteristics of friends in the neighborhood of each ego
    av_fr_age_vec(i)=nansum(age_vec(friends_num))/length(isnan(age_vec(friends_num)));
    av_fr_height_vec(i)=nansum(height_vec(friends_num))/length(isnan(height_vec(friends_num)));
    av_fr_weight_vec(i)=nansum(weight_vec(friends_num))/length(isnan(weight_vec(friends_num)));
    portion_of_friends_that_male_vec(i)=sum(gender_vec==1)/length(friends_num);
    
    portion_of_friends_US_vec(i)=sum(country_vec==1)/length(friends_num);
    portion_of_friends_UK_vec(i)=sum(country_vec==2)/length(friends_num);
    portion_of_friends_JP_vec(i)=sum(country_vec==3)/length(friends_num);
    portion_of_friends_CA_vec(i)=sum(country_vec==4)/length(friends_num);
    portion_of_friends_wristdevice_vec(i)=sum(device_vec==1)/length(friends_num);
    portion_of_friends_application1_vec(i)=sum(device_vec==2)/length(friends_num);
    portion_of_friends_application2_vec(i)=sum(device_vec==3)/length(friends_num);
    
    
    
    
     %find the average connectivity of the friends of ego:
    connectivity_of_friends=zeros(length(daynumbers_consideration),length(friends_num));
    for ik=1:length(friends_num)
        friends_of_friends_created_datenumber=sort(links_for_consideration.created_datenumber(find(links_for_consideration.FROM_USER_NUM==friends_num(ik))));
        for jk=1:length(friends_of_friends_created_datenumber)
        connectivity_of_friends(find(daynumbers_consideration-friends_of_friends_created_datenumber(jk)>=0),ik)=jk;
        end
    end
    average_friends_connectivity_mat(:,i)=mean(connectivity_of_friends,2);
    
    %time varying characteristics
    linkscreatednumbers=links_noprcp_no_tmax_corr.created_datenumber(links_noprcp_no_tmax_corr.FROM_USER_NUM==un_egos_num(i));
     %isolate the the cases where running activity happened before ego activity on the same day
    ego_time=StartTime_mat(:,un_egos_num(i))-TimeZone_mat(:,un_egos_num(i)); %time of ego activity converted to London time (GMT+00)
    friends_time=StartTime_mat(:,friends_num)-TimeZone_mat(:,friends_num);
    considered_case=(friends_time-repmat(ego_time,1,length(friends_num)))<0; 
    run_mat_f=run_mat_wnan(:,friends_num);run_mat_f(find(considered_case==0 & run_mat_f>0))=0;
    distance_mat_f=distance_mat_wnan(:,friends_num);distance_mat_f(find(considered_case==0 & distance_mat_f>0))=0;
    

    
        for j=1:length(friends_num)
            run_mat_f(1:linkscreatednumbers(j)-daynumbers_consideration(1),j)=NaN;
            distance_mat_f(1:linkscreatednumbers(j)-daynumbers_consideration(1),j)=NaN;
        end 
      
     
     no_of_active_friends_t_mat(:,i)=sum(run_mat_f>0 ,2);   no_of_active_friends_t_mat(find(sum(isnan(run_mat_f),2)==length(run_mat_f(1,:))),i)=NaN;
     ego_connectivity_t_mat(:,i)=sum(~isnan(run_mat_f) ,2); ego_connectivity_t_mat(find(sum(isnan(run_mat_f),2)==length(run_mat_f(1,:))),i)=NaN;
     
     no_of_active_friends_t_mat(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
     ego_connectivity_t_mat(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
    
     ego_adj_mat=zeros(length(friends_num),length(friends_num));
     for pii=1:length(ego_adj_mat)
         for pjj=pii+1:length(ego_adj_mat)
             if length(find(USERREL_USEDFOR_SOCIAL_INFLUENCE_wth_correlations.FROM_USER_NUM==friends_num(pii) & USERREL_USEDFOR_SOCIAL_INFLUENCE_wth_correlations.TO_USER_NUM==friends_num(pjj)))>0
               ego_adj_mat(pii,pjj)=1;
             end
         end
     end
     ego_adj_mat=ego_adj_mat+ego_adj_mat';
     [kk,ll]=graphconncomp(sparse(ego_adj_mat));
     ego_no_of_connected_comp_mat(find(isnan(ego_connectivity_t_mat(:,i))),i)=NaN;
     ego_no_of_connected_comp_mat(find(sum(~isnan(run_mat_f),2)==length(run_mat_f(1,:))),i)=kk;
     ego_no_of_connected_comp_mat(find(sum(isnan(run_mat_f),2)==length(run_mat_f(1,:))),i)=NaN;
     
     b=find(sum(~isnan(run_mat_f),2)~=length(run_mat_f(1,:)) & sum(isnan(run_mat_f),2)~=length(run_mat_f(1,:)) & ~isnan(ego_no_of_connected_comp_mat(:,i)));
     
     
    for bi=1:length(b)
        friends_num_subset_ind=(find(~isnan(run_mat_f(b(bi),:))));
        ego_adj_mat_sub=ego_adj_mat(friends_num_subset_ind,friends_num_subset_ind);
        [kkk,lll]=graphconncomp(sparse(ego_adj_mat_sub));
        ego_no_of_connected_comp_mat(b(bi),i)=kkk;
    end
        
        
     cmp_ru_t=repmat(ll,length(daynumbers_consideration),1).*(run_mat_f>0);
     no_of_components_who_run_t=zeros(length(cmp_ru_t),1);
     for uj=1:length(daynumbers_consideration)
         unj=unique(cmp_ru_t(uj,:)); unj(find(unj==0))=[];
         no_of_components_who_run_t(uj)=length(unj);
     end
     no_of_components_who_run_t(find(isnan(ego_connectivity_t_mat(:,i))))=NaN;
     no_of_components_who_run_t_mat(:,i)=no_of_components_who_run_t; no_of_components_who_run_t_mat(find(sum(isnan(run_mat_f),2)==length(run_mat_f(1,:))),i)=NaN;
     
     no_of_components_who_run_t_mat(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
     ego_no_of_connected_comp_mat(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
    

    
    
   if mod(i,10000)==0
       i
   end
    
end
  
av_fr_age_mat=repmat(av_fr_age_vec, length(daynumbers_consideration),1);
av_fr_height_mat=repmat(av_fr_height_vec, length(daynumbers_consideration),1);
av_fr_weight_mat=repmat(av_fr_weight_vec, length(daynumbers_consideration),1);
portion_of_friends_that_male_mat=repmat(portion_of_friends_that_male_vec, length(daynumbers_consideration),1);

portion_of_friends_US_mat=repmat(portion_of_friends_US_vec, length(daynumbers_consideration),1);
portion_of_friends_UK_mat=repmat(portion_of_friends_UK_vec, length(daynumbers_consideration),1);
portion_of_friends_JP_mat=repmat(portion_of_friends_JP_vec, length(daynumbers_consideration),1);
portion_of_friends_CA_mat=repmat(portion_of_friends_CA_vec, length(daynumbers_consideration),1);
portion_of_friends_wristdevice_mat=repmat(portion_of_friends_wristdevice_vec, length(daynumbers_consideration),1);
portion_of_friends_application1_mat=repmat(portion_of_friends_application1_vec, length(daynumbers_consideration),1);
portion_of_friends_application2_mat=repmat(portion_of_friends_application2_vec, length(daynumbers_consideration),1);



ego_level_same_day_noprectmaxcorr.av_fr_age=reshape(av_fr_age_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.av_fr_height=reshape(av_fr_height_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.av_fr_weight=reshape(av_fr_weight_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.portion_of_friends_that_male=reshape(portion_of_friends_that_male_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.portion_of_friends_US=reshape(portion_of_friends_US_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.portion_of_friends_UK=reshape(portion_of_friends_UK_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.portion_of_friends_JP=reshape(portion_of_friends_JP_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.portion_of_friends_CA=reshape(portion_of_friends_CA_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.portion_of_friends_wristdevice=reshape(portion_of_friends_wristdevice_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.portion_of_friends_application1=reshape(portion_of_friends_application1_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.portion_of_friends_application2=reshape(portion_of_friends_application2_mat,length(daynumbers_consideration)*length(un_egos_num),1);

% 
ego_level_same_day_noprectmaxcorr.ego_connectivity_t=reshape(ego_connectivity_t_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.av_fr_connectivity_t=reshape(average_friends_connectivity_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.no_of_active_friends_t=reshape(no_of_active_friends_t_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.ego_no_of_connected_comp_t=reshape(ego_no_of_connected_comp_mat,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.no_of_components_who_run_t=reshape(no_of_components_who_run_t_mat,length(daynumbers_consideration)*length(un_egos_num),1);

    

%%%%%%%%%%%%%Move to Weather%%%%%%%%%%%%%

%%%%Find the precipitation over seasonal average%%%%%%

% compute the seasonal average of precipitation within 2 months centered on day t 
PRECIPITATION_seasonal_average_mat=zeros(size(PRECIPITATION_mat));
  for i=1:30
      mat=PRECIPITATION_mat(1:i+30,:);
  PRECIPITATION_seasonal_average_mat(i,:)=nansum(mat,1)./length(mat(:,1));
  end
  
  for i=31:length(PRECIPITATION_seasonal_average_mat(:,1))-30
      
       mat=PRECIPITATION_mat(i-30:i+30,:);
       PRECIPITATION_seasonal_average_mat(i,:)=nansum(mat,1)./length(mat(:,1));
  end
  
  for i=length(PRECIPITATION_seasonal_average_mat(:,1))-30+1:length(PRECIPITATION_seasonal_average_mat(:,1))
      
       mat=PRECIPITATION_mat(i-30:end,:);
       PRECIPITATION_seasonal_average_mat(i,:)=nansum(mat,1)./length(mat(:,1));
  end

%compute the excess of precipitation over the seasonal value
PRECIPITATION_over_seasonal_average_mat=PRECIPITATION_mat-PRECIPITATION_seasonal_average_mat;
PRECIPITATION_over_seasonal_average_mat(find(PRECIPITATION_over_seasonal_average_mat<0))=0;


%%%%EGO WEATHER%%%%
ego_prcp_t=(PRECIPITATION_over_seasonal_average_mat(:,un_egos_num)>0)+0;ego_prcp_t(find(isnan(PRECIPITATION_over_seasonal_average_mat(:,un_egos_num))))=NaN; %this is a binary indicator that is equal to 1 if the precipitation that ego experiences is larger than a seasonal average and zero otherwise
T_mat_t=(T_mat<34.4 | T_mat>304.4)+0; T_mat_t(find(isnan(T_mat)))=NaN; T_mat_t(find(isnan(run_mat)))=NaN; %it is a binary indicator that is equal to 1 if the remperature is larger than 30.4 celcius or less than 3.44 Ceslius and 0 otherwise
ego_tmax_t=T_mat_t(:,un_egos_num);

ego_prcp_t_1=[NaN(1,length(ego_prcp_t(1,:)));ego_prcp_t(1:end-1,:)];
ego_prcp_t_2=[NaN(2,length(ego_prcp_t(1,:)));ego_prcp_t(1:end-2,:)];
ego_prcp_t_3=[NaN(3,length(ego_prcp_t(1,:)));ego_prcp_t(1:end-3,:)];

ego_tmax_t_1=[NaN(1,length(ego_tmax_t(1,:)));ego_tmax_t(1:end-1,:)];
ego_tmax_t_2=[NaN(2,length(ego_tmax_t(1,:)));ego_tmax_t(1:end-2,:)];
ego_tmax_t_3=[NaN(3,length(ego_tmax_t(1,:)));ego_tmax_t(1:end-3,:)];


ego_level_same_day_noprectmaxcorr.ego_prcp_t=reshape(ego_prcp_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.ego_tmax_t=reshape(ego_tmax_t,length(daynumbers_consideration)*length(un_egos_num),1);


%%%%create the instruments%%%%

%%%%%%%%CREATE INSTRUMENTS%%%%%%%%
rain_0_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_1_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_2_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_3_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_4_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_5_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_6_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_7_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_8_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_9_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_10_t=zeros(length(daynumbers_consideration),length(un_egos_num));
rain_11_t=zeros(length(daynumbers_consideration),length(un_egos_num));
friends_rain_t=zeros(length(daynumbers_consideration),length(un_egos_num));

tmax_0_t=zeros(length(daynumbers_consideration),length(un_egos_num));
tmax_1_t=zeros(length(daynumbers_consideration),length(un_egos_num));
tmax_2_t=zeros(length(daynumbers_consideration),length(un_egos_num));
tmax_3_t=zeros(length(daynumbers_consideration),length(un_egos_num));
tmax_4_t=zeros(length(daynumbers_consideration),length(un_egos_num));
tmax_5_t=zeros(length(daynumbers_consideration),length(un_egos_num));
tmax_6_t=zeros(length(daynumbers_consideration),length(un_egos_num));
tmax_7_t=zeros(length(daynumbers_consideration),length(un_egos_num));
tmax_8_t=zeros(length(daynumbers_consideration),length(un_egos_num));
tmax_9_t=zeros(length(daynumbers_consideration),length(un_egos_num));
is_temp_t=zeros(length(daynumbers_consideration),length(un_egos_num));



for i=1:length(un_egos_num)
    
    
    
    friends_num=links_noprcp_no_tmax_corr.TO_USER_NUM(find(links_noprcp_no_tmax_corr.FROM_USER_NUM==un_egos_num(i)));
    PRECIPITATION_friends_mat=PRECIPITATION_mat(:,friends_num);
        %find the the friends that experience unique set of weather. In other words here we identify the unique set of cities that friends are located.
    [q,ik,ij]=unique(PRECIPITATION_friends_mat','rows');
    friends_num=friends_num(ik);
    TMAX_friends_mat=TMAX_mat(:,friends_num);
    run_fr_mat=run_mat_wnan(:,friends_num);
    PRECIPITATION_friends_mat(find(isnan(run_fr_mat)))=NaN;
    TMAX_friends_mat(find(isnan(run_fr_mat)))=NaN;
    
    max_prec_mat=repmat(nansum(PRECIPITATION_friends_mat)./sum(~isnan(PRECIPITATION_friends_mat)),length(daynumbers_consideration),1);    
    max_tmax_mat=repmat(max(TMAX_friends_mat,[],1),length(daynumbers_consideration),1);    
    mean_tmax_mat=repmat(nansum(TMAX_friends_mat)./sum(~isnan(TMAX_friends_mat)),length(daynumbers_consideration),1);
    min_tmax_mat=repmat(min(TMAX_friends_mat,[],1),length(daynumbers_consideration),1);    
    
    pr_0=(PRECIPITATION_friends_mat==0)+0;pr_0(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_1=(PRECIPITATION_friends_mat<(max_prec_mat/20))+0;pr_1(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_2=(PRECIPITATION_friends_mat<(max_prec_mat/10))+0;pr_2(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_3=(PRECIPITATION_friends_mat<(max_prec_mat/5))+0;pr_3(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_4=(PRECIPITATION_friends_mat<(max_prec_mat/4))+0;pr_4(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_5=(PRECIPITATION_friends_mat<(max_prec_mat/2))+0;pr_5(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_6=(PRECIPITATION_friends_mat<(max_prec_mat/1))+0;pr_6(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_7=(PRECIPITATION_friends_mat<(max_prec_mat*1.5))+0;pr_7(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_8=(PRECIPITATION_friends_mat<(max_prec_mat*2))+0;pr_8(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_9=(PRECIPITATION_friends_mat<(max_prec_mat*5))+0;pr_9(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_10=(PRECIPITATION_friends_mat<(max_prec_mat*7))+0;pr_10(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    pr_11=(PRECIPITATION_friends_mat<(max_prec_mat*10))+0;pr_11(find(isnan(PRECIPITATION_friends_mat)))=NaN;
    
    t_0=(TMAX_friends_mat<(min_tmax_mat+(mean_tmax_mat-min_tmax_mat)/5))+0;t_0(find(isnan(TMAX_friends_mat)))=NaN;
    t_1=(TMAX_friends_mat<(min_tmax_mat+2*(mean_tmax_mat-min_tmax_mat)/5))+0;t_1(find(isnan(TMAX_friends_mat)))=NaN;
    t_2=(TMAX_friends_mat>=(min_tmax_mat+3.5*(mean_tmax_mat-min_tmax_mat)/5))+0;t_2(find(isnan(TMAX_friends_mat)))=NaN;
    t_3=(TMAX_friends_mat>=(min_tmax_mat+4*(mean_tmax_mat-min_tmax_mat)/5))+0;t_3(find(isnan(TMAX_friends_mat)))=NaN;
    t_4=(TMAX_friends_mat<mean_tmax_mat)+0;t_4(find(isnan(TMAX_friends_mat)))=NaN;
    t_5=(TMAX_friends_mat>=mean_tmax_mat)+0;t_5(find(isnan(TMAX_friends_mat)))=NaN;
    t_6=(TMAX_friends_mat>=(max_tmax_mat-(max_tmax_mat-mean_tmax_mat)/5))+0;t_6(find(isnan(TMAX_friends_mat)))=NaN;
    t_7=(TMAX_friends_mat>=(max_tmax_mat-2*(max_tmax_mat-mean_tmax_mat)/5))+0;t_7(find(isnan(TMAX_friends_mat)))=NaN;
    t_8=(TMAX_friends_mat>=(max_tmax_mat-3*(max_tmax_mat-mean_tmax_mat)/5))+0;t_8(find(isnan(TMAX_friends_mat)))=NaN;
    t_9=(TMAX_friends_mat>=(max_tmax_mat-4*(max_tmax_mat-mean_tmax_mat)/5))+0;t_9(find(isnan(TMAX_friends_mat)))=NaN;
    
    
    rain_0_t(:,i)=nansum(pr_0,2);rain_0_t(find(sum(isnan(pr_0),2)==length(pr_0(1,:))),i)=NaN;
    rain_1_t(:,i)=nansum(pr_1,2);rain_1_t(find(sum(isnan(pr_1),2)==length(pr_1(1,:))),i)=NaN;
    rain_2_t(:,i)=nansum(pr_2,2);rain_2_t(find(sum(isnan(pr_2),2)==length(pr_2(1,:))),i)=NaN;
    rain_3_t(:,i)=nansum(pr_3,2);rain_3_t(find(sum(isnan(pr_3),2)==length(pr_3(1,:))),i)=NaN;
    rain_4_t(:,i)=nansum(pr_4,2);rain_4_t(find(sum(isnan(pr_4),2)==length(pr_4(1,:))),i)=NaN;
    rain_5_t(:,i)=nansum(pr_5,2);rain_5_t(find(sum(isnan(pr_5),2)==length(pr_5(1,:))),i)=NaN;
    rain_6_t(:,i)=nansum(pr_6,2);rain_6_t(find(sum(isnan(pr_6),2)==length(pr_6(1,:))),i)=NaN;
    rain_7_t(:,i)=nansum(pr_7,2);rain_7_t(find(sum(isnan(pr_7),2)==length(pr_7(1,:))),i)=NaN;
    rain_8_t(:,i)=nansum(pr_8,2);rain_8_t(find(sum(isnan(pr_8),2)==length(pr_8(1,:))),i)=NaN;
    rain_9_t(:,i)=nansum(pr_9,2);rain_9_t(find(sum(isnan(pr_9),2)==length(pr_9(1,:))),i)=NaN;
    rain_10_t(:,i)=nansum(pr_10,2);rain_10_t(find(sum(isnan(pr_10),2)==length(pr_10(1,:))),i)=NaN;
    rain_11_t(:,i)=nansum(pr_11,2);rain_11_t(find(sum(isnan(pr_11),2)==length(pr_11(1,:))),i)=NaN;
    
    
    tmax_0_t(:,i)=nansum(t_0,2);tmax_0_t(find(sum(isnan(t_0),2)==length(t_0(1,:))),i)=NaN;
    tmax_1_t(:,i)=nansum(t_1,2);tmax_1_t(find(sum(isnan(t_1),2)==length(t_1(1,:))),i)=NaN;
    tmax_2_t(:,i)=nansum(t_2,2);tmax_2_t(find(sum(isnan(t_2),2)==length(t_2(1,:))),i)=NaN;
    tmax_3_t(:,i)=nansum(t_3,2);tmax_3_t(find(sum(isnan(t_3),2)==length(t_3(1,:))),i)=NaN;
    tmax_4_t(:,i)=nansum(t_4,2);tmax_4_t(find(sum(isnan(t_4),2)==length(t_4(1,:))),i)=NaN;
    tmax_5_t(:,i)=nansum(t_5,2);tmax_5_t(find(sum(isnan(t_5),2)==length(t_5(1,:))),i)=NaN;
    tmax_6_t(:,i)=nansum(t_6,2);tmax_6_t(find(sum(isnan(t_6),2)==length(t_6(1,:))),i)=NaN;
    tmax_7_t(:,i)=nansum(t_7,2);tmax_7_t(find(sum(isnan(t_7),2)==length(t_7(1,:))),i)=NaN;
    tmax_8_t(:,i)=nansum(t_8,2);tmax_8_t(find(sum(isnan(t_8),2)==length(t_8(1,:))),i)=NaN;
    tmax_9_t(:,i)=nansum(t_9,2);tmax_9_t(find(sum(isnan(t_9),2)==length(t_9(1,:))),i)=NaN;
    
    
    
    linkscreatednumbers=links_noprcp_no_tmax_corr.created_datenumber(links_noprcp_no_tmax_corr.FROM_USER_NUM==un_egos_num(i));

            rain_0_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_1_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_2_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_3_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_4_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_5_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_6_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_7_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_8_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_9_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_10_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            rain_11_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
          
            
            tmax_0_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            tmax_1_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            tmax_2_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            tmax_3_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            tmax_4_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            tmax_5_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            tmax_6_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            tmax_7_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            tmax_8_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;
            tmax_9_t(1:min(linkscreatednumbers-daynumbers_consideration(1)),i)=NaN;

    
   if mod(i,10000)==0
       i
   end
    
end


ego_level_same_day_noprectmaxcorr.fr_rain_0_t=reshape(rain_0_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_1_t=reshape(rain_1_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_2_t=reshape(rain_2_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_3_t=reshape(rain_3_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_4_t=reshape(rain_4_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_5_t=reshape(rain_5_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_6_t=reshape(rain_6_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_7_t=reshape(rain_7_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_8_t=reshape(rain_8_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_9_t=reshape(rain_9_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_10_t=reshape(rain_10_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_rain_11_t=reshape(rain_11_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_tmax_0_t=reshape(tmax_0_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_tmax_1_t=reshape(tmax_1_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_tmax_2_t=reshape(tmax_2_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_tmax_3_t=reshape(tmax_3_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_tmax_4_t=reshape(tmax_4_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_tmax_5_t=reshape(tmax_5_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_tmax_6_t=reshape(tmax_6_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_tmax_7_t=reshape(tmax_7_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_tmax_8_t=reshape(tmax_8_t,length(daynumbers_consideration)*length(un_egos_num),1);
ego_level_same_day_noprectmaxcorr.fr_tmax_9_t=reshape(tmax_9_t,length(daynumbers_consideration)*length(un_egos_num),1);


ego_level_same_day_noprectmaxcorr=struct2dataset(ego_level_same_day_noprectmaxcorr);
l=find(isnan(isnan(ego_level_same_day_noprectmaxcorr.ego_distance_t)));
ego_level_same_day_noprectmaxcorr(l,:)=[];

l=find(isnan(ego_level_same_day_noprectmaxcorr.ego_connectivity_t));
ego_level_same_day_noprectmaxcorr(l,:)=[];

l=find(isnan(ego_level_same_day_noprectmaxcorr.fr_rain_0_t)); 
ego_level_same_day_noprectmaxcorr(l,:)=[];

size(ego_level_same_day_noprectmaxcorr)
b = double(ego_level_same_day_noprectmaxcorr); 
b=permute(b,[2,1]);
tic, fid = fopen('structural_diversity_data.txt', 'wt'); ... 
for i=1:length(ego_level_same_day_noprectmaxcorr.Properties.VarNames)-1

    fprintf(fid, '%s,',ego_level_same_day_noprectmaxcorr.Properties.VarNames{i});
end
fprintf(fid, '%s',ego_level_same_day_noprectmaxcorr.Properties.VarNames{end});
fprintf(fid, '\n');
fprintf(fid, '%d,%d,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f\n',b); ...
fclose(fid); toc





